Texas: By County and Health Section

  1. Download covid data
  2. Download geojson data
  3. Augment covid data with health section info
In [1]:
from datetime import datetime, timedelta
import math
import os
import time
import json


from plotly.offline import init_notebook_mode, iplot
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter, ColumnDataSource
from bokeh.plotting import figure, output_file, show
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

# bokeh: configure for notebook
# https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
output_notebook()

# load data: check if we have a fresh local version (8 hours ago or newer)
#           if we don't have a fresh version, pull down a remote csv
def download_data_source(local_path):
    """Download latest data and save to param:local_path"""
    import requests

    data_source = (
        "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
    )
    resp = requests.get(data_source, stream=True)
    resp.raise_for_status()
    with open(local_path, "wb") as openfile:
        for block in resp.iter_content(1024):
            openfile.write(block)


local_path = os.path.join(".", "us-counties.csv")
if os.path.isfile(local_path):
    mtime = int(os.stat(local_path).st_mtime)
    now = int(time.time())
    age = (now - mtime) / 60 / 60  # to horus
    if age >= 8:
        print("Local Data: is stale - downloading")
        download_data_source(local_path)
else:
    print("Local Data: not found - downloading")
    download_data_source(local_path)

print("Local Data: loading from file")
df = pd.read_csv(local_path, dtype={"fips": str})
df = df[df.state == "Texas"]  # filter to only texas data
df.head()
Loading BokehJS ...
Local Data: loading from file
Out[1]:
date county state fips cases deaths
143 2020-02-12 Bexar Texas 48029 1 0
154 2020-02-13 Bexar Texas 48029 2 0
165 2020-02-14 Bexar Texas 48029 2 0
176 2020-02-15 Bexar Texas 48029 2 0
187 2020-02-16 Bexar Texas 48029 2 0
In [2]:
# download geojson
# https://github.com/TNRIS/tx.geojson/blob/master/counties/tx_counties.geojson
def download_geojson(local_path):
    import requests

    data_source = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
    resp = requests.get(data_source, stream=True)
    resp.raise_for_status()
    with open(local_path, "wb") as openfile:
        for block in resp.iter_content(1024):
            openfile.write(block)


geojson_local_path = os.path.join(".", "geojson-counties-fips.json")
if not os.path.isfile(geojson_local_path):
    print("Geojson: downloading")
    download_geojson(geojson_local_path)

with open(geojson_local_path, "r") as openfile:
    geojson = json.loads(openfile.read())
    print("Geojson: loaded data")


print(json.dumps(geojson, indent=2, sort_keys=True)[:100])
Geojson: loaded data
{
  "features": [
    {
      "geometry": {
        "coordinates": [
          [
            [
     
In [3]:
# https://hhs.texas.gov/sites/default/files/documents/about-hhs/hhs-regional-map.pdf
from texas_doh_regions import (
    region1,
    region2,
    region3,
    region4,
    region5,
    region6,
    region7,
    region8,
    region9,
    region10,
)

region_county_dict = dict(
    region_1=region1,
    region_2=region2,
    region_3=region3,
    region_4=region4,
    region_5=region5,
    region_6=region6,
    region_7=region7,
    region_8=region8,
    region_9=region9,
    region1_0=region10,
)


def get_region(county):
    for region, counties in region_county_dict.items():
        if county in counties:
            return region.replace("_", " ").title()


df["region"] = df.county.apply(get_region)
df.head()
Out[3]:
date county state fips cases deaths region
143 2020-02-12 Bexar Texas 48029 1 0 Region 8
154 2020-02-13 Bexar Texas 48029 2 0 Region 8
165 2020-02-14 Bexar Texas 48029 2 0 Region 8
176 2020-02-15 Bexar Texas 48029 2 0 Region 8
187 2020-02-16 Bexar Texas 48029 2 0 Region 8
In [4]:
# import county population data
with open("tx-county-population--modified.json", "r") as openfile:
    tx_county_data = json.loads(openfile.read())

df["population"] = df.county.apply(lambda county: tx_county_data[county]["Pop"])
df["cases_pop"] = df.apply(lambda row: row.cases / row.population, axis=1)
df["deaths_pop"] = df.apply(lambda row: row.deaths / row.population, axis=1)
df["death_rate"] = df.apply(lambda row: row.deaths / row.cases, axis=1)
df.head()
Out[4]:
date county state fips cases deaths region population cases_pop deaths_pop death_rate
143 2020-02-12 Bexar Texas 48029 1 0 Region 8 1986049 5.035122e-07 0.0 0.0
154 2020-02-13 Bexar Texas 48029 2 0 Region 8 1986049 1.007024e-06 0.0 0.0
165 2020-02-14 Bexar Texas 48029 2 0 Region 8 1986049 1.007024e-06 0.0 0.0
176 2020-02-15 Bexar Texas 48029 2 0 Region 8 1986049 1.007024e-06 0.0 0.0
187 2020-02-16 Bexar Texas 48029 2 0 Region 8 1986049 1.007024e-06 0.0 0.0
In [5]:
# Cloropleth Maps
#
# Constants
tx_center = {"lat": 31.169621, "lon": -99.683617}
default_cloropleth_kwargs = dict(
    geojson=geojson,
    locations="fips",
    color_continuous_scale="Plasma",
    mapbox_style="carto-positron",
    zoom=4.2,
    center=tx_center,
    opacity=0.5,
)
In [6]:
_min = min(df.cases.unique())
_max = max(df.cases.unique())
fig = px.choropleth_mapbox(
    df,
    color="cases",
    range_color=(_min, _max),
    labels={"cases": "cases"},
    hover_data=["county", "cases", "population", "cases_pop"],
    **default_cloropleth_kwargs
)

fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
In [7]:
_min = min(df.cases_pop.unique())
_max = max(df.cases_pop.unique())
fig = px.choropleth_mapbox(
    df,
    color="cases_pop",
    range_color=(_min, _max),
    labels={"cases_pop": "% Infected"},
    hover_data=["county", "cases", "population", "cases_pop"],
    **default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
In [8]:
_min = min(df.deaths_pop.unique())
_max = max(df.deaths_pop.unique())
fig = px.choropleth_mapbox(
    df,
    color="deaths_pop",
    range_color=(_min, _max),
    labels={"deaths_pop": "Death Count"},
    hover_data=["county", "deaths", "population", "deaths_pop"],
    **default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
In [9]:
df_death_rate_unique = df.death_rate.unique()
_min, _max = min(df_death_rate_unique), max(df_death_rate_unique)
fig = px.choropleth_mapbox(
    df,
    color="death_rate",
    range_color=(_min, _max),
    labels={"death_rate": "Mortality Rate"},
    hover_data=["county", "death_rate", "deaths", "cases", "population"],
    **default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
In [10]:
def function(df):
    df = df[["cases", "county", "date",]].groupby(by="date").agg(["sum"])
    df.columns = {("cases", "sum"): "cases_sum", ("deaths", "sum"): "deaths_sum"}
    df.head()
    p = figure(
        title="Texas: Cases by Health Region",
        x_axis_label="Date",
        y_axis_label="# of Cases & Deaths",
        y_range=[0, int(df[("cases", "sum")].max() * 1.20)],
        x_range=[df.index[0], pd.Timestamp(df.index.max()) + timedelta(days=2)],
        plot_width=880,
        tools="pan,wheel_zoom,box_zoom,reset",
    )
    # add actual values as an area graph
    source = ColumnDataSource(
        data=dict(x=df.index, cases=df[("cases", "sum")], deaths=df[("deaths", "sum")],)
    )
    p.varea_stack(["deaths", "cases"], x="x", color=("red", "lightblue"), source=source)
    # add unbiased polyfit
    steps = 10
    extended_x = list(df.index) + [
        pd.Timestamp(df.index.max()) + timedelta(days=i) for i in range(1, steps + 1)
    ]
    label_dict = {str(d): d for d in df.index}
    p.xaxis.formatter = FuncTickFormatter(
        code="""
        let date = new Date(tick);
        return `${date.getDate()}-${date.getMonth()}-${date.getFullYear()}`
    """
    )
    return p


show(function(df))